---
title: "Observing interviewer performance in slices or by traces: A comparison of methods to predict interviewers’ individual contributions to interviewer variance"
date: "14 February 2022"
author: "Celine Wuyts and Geert Loosveldt"
geometry: margin = 3cm
output:
  pdf_document:
    fig_caption: yes
    latex_engine: xelatex
    number_sections: yes
---

```{r setup, include = FALSE}

knitr::opts_chunk$set(echo = FALSE, error = FALSE, warning = FALSE, message = FALSE,
                      fig.width = 7, fig.height = 10)

library(tidyverse)
library(ggthemes)
library(cowplot)
library(colortools)
library(kableExtra)
library(stargazer)
library(formattable)

library(lme4)
#library(lmerTest) # detach("package:lmerTest")
library(merTools)

library(sjstats)
library(broom)
library(reghelper)

options(knitr.table.format = "latex", knitr.kable.NA = '.')

```

```{r themes}

KULgold <- rgb(231, 176, 55, maxColorValue = 255)
ESSpink <- rgb(232, 51, 176, maxColorValue = 255)
KULgreen <- rgb(212, 216, 66, maxColorValue = 255)

Colorvec <- c(KULgold, ESSpink, KULgreen)

PSize_portrait <- c(160, 210)

themeKUL <- theme_tufte(base_size = 9) + # base_family = "Calibri"
    theme(axis.title = element_text(size = 9, face = "plain"),
          axis.text = element_text(size = 9),
          axis.line.x = element_line(),
          plot.title = element_blank(),
          legend.title = element_blank(),
          legend.text = element_text(size = 9),
          strip.text = element_text(size = 9, face = "bold"),
          legend.position = "bottom",
          legend.direction = "horizontal",
          legend.box = "vertical",
          legend.spacing = unit(0, "line"),
          legend.key.size = unit(.75, "line"))

```


# Data and methods

```{r get_data}

# Sample figures
analyticsample <- read.csv("20200108_sample.csv", dec = ".", 
                 sep = ";", stringsAsFactors = F)

# ICCs
ICCs <- read.csv("20200108_ICCs.csv", dec = ".", 
                 sep = ";", stringsAsFactors = F)

ICCs <- ICCs %>%
  filter(nobs >= 300) # Drops 13

# Random effects
RE <- read.csv("20200108_RE.csv", dec = ".", 
                 sep = ";", stringsAsFactors = F)

# Interviewers
interviewers <- read.csv("20200108_interviewers.csv" , dec = ".", 
                 sep = ";", stringsAsFactors = F)

# Cumulative interview time measures
speed <- read.csv("speedindicators.ICum.csv" , dec = ".", 
                 sep = ";", stringsAsFactors = F)

RE_cum <- merge(merge(RE, interviewers[c("essround", "cntry", "intnum", "INTNUM_FW", "iWL")], 
                      by = c("essround", "cntry", "intnum")),
                speed, by = c("essround", "cntry", "intnum"), all.x = T)
```

\FloatBarrier

## Analytic sample

**Interviewers**

`r with(analyticsample, nint[essround == 6])` interviewers in ESS6_BEDUT

`r with(analyticsample, nint[essround == 7])` interviewers in ESS7_BEDUT

`r with(analyticsample, nintoverlap[essround == 6])` interviewers worked in (and are included in the analysis for) both survey rounds

**Respondents**

The selected interviewers administered 

`r with(analyticsample, nobs[essround == 6])` interviews in ESS6_BEDUT

`r with(analyticsample, nobs[essround == 7])` interviews in ESS7_BEDUT



## Selection of survey items

```{r select_items}

# Spread by round
ICCs_itembyround <- ICCs %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, item, icc_fit_controls, label) %>%
  spread(key = essround, value = icc_fit_controls)

# Number of items
nitems_R6 <- with(ICCs_itembyround, sum(!is.na(ESS6_BEDUT)))
nitems_R7 <- with(ICCs_itembyround, sum(!is.na(ESS7_BEDUT)))
nitems_R6R7 <- with(ICCs_itembyround, sum(!is.na(ESS6_BEDUT) & !is.na(ESS7_BEDUT)))

# Subset items significant and ICC > 1%
ICCs_selected <- ICCs %>%
  filter(LRtest_pvalue < .05)   # icc_fit_controls > .01 & 

# Spread by round
ICCs_selected_itembyround <- ICCs_selected %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, item, icc_fit_controls, label) %>%
  spread(key = essround, value = icc_fit_controls)

# Number of items
nitems_selected_R6 <- with(ICCs_selected_itembyround, sum(!is.na(ESS6_BEDUT)))
nitems_selected_R7 <- with(ICCs_selected_itembyround, sum(!is.na(ESS7_BEDUT)))
nitems_selected_R6R7 <- with(ICCs_selected_itembyround, sum(!is.na(ESS6_BEDUT) & !is.na(ESS7_BEDUT)))

```

```{r compute_summary_ICCs}

ICCs_cor <- with(ICCs_itembyround, cor.test(ESS6_BEDUT, ESS7_BEDUT, use = "complete.cases"))
ICCs_selected_cor <- with(ICCs_selected_itembyround, cor.test(ESS6_BEDUT, ESS7_BEDUT, use = "complete.cases"))

```

`r nitems_R6` items for ESS6_BEDUT

`r nitems_R7` items for ESS7_BEDUT

of which `r nitems_R6R7` items were repeated in both survey rounds

Selection criterium (statistical significance) leaves

`r nitems_selected_R6` items for ESS6_BEDUT (`r formattable::percent(nitems_selected_R6/nitems_R6, 0)`)

`r nitems_selected_R7` items for ESS7_BEDUT (`r formattable::percent(nitems_selected_R7/nitems_R7, 0)`)

of which `r nitems_selected_R6R7` items were repeated in both survey rounds  (`r formattable::percent(nitems_selected_R6R7/nitems_R6R7, 0)`)


```{r ICCs_plot, fig.cap = "\\label{fig:ICCs_plot} Intra-interviewer correlation estimates (all items)"}

p1 <- ggplot(ICCs %>%
               mutate(icc_fit_controls_mean = ave(icc_fit_controls, item, FUN = mean),
                      both_rounds = ave(icc_fit_controls, item, FUN = length) == 2) %>%
               arrange(icc_fit_controls_mean) %>%
               mutate(essround = as.factor(paste0("ESS", essround, "_BEDUT")),
                      item = factor(as.factor(item), levels = unique(item)),
                      panel = ifelse(as.numeric(item) > mean(as.numeric(item)), 1, 2)), 
            aes(x = icc_fit_controls, y = item, col = essround, shape = essround, alpha = both_rounds)) +
  geom_point() +
  scale_x_continuous(name = "Intra-interviewer correlation", limits = c(0, NA), breaks = seq(0, .1, .01)) +
  scale_colour_manual(values = c("ESS6_BEDUT" = ESSpink, "ESS7_BEDUT" = KULgold)) +
  scale_shape_manual(values = c("ESS6_BEDUT" = 15, "ESS7_BEDUT" = 19)) +
  scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = .33), guide = F) +
  facet_wrap(. ~ panel, scales = "free_y") +
  themeKUL +
  theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.line.x = element_line(),
        strip.text = element_blank(),
        legend.position = "bottom",
        legend.direction = "vertical")

p1

```

```{r ICCs_overviewtable, results = "asis"}

overviewtable <- ICCs_selected_itembyround %>%
  mutate(icc_fit_controls_mean = rowMeans(cbind(ESS6_BEDUT, ESS7_BEDUT), na.rm = T)) %>% 
  arrange(-icc_fit_controls_mean) %>%
  dplyr::select(item, label, ESS6_BEDUT, ESS7_BEDUT) %>%
  mutate(label = gsub("/", ", ", label))

kable(overviewtable,
      row.names = F,
      booktabs = T, 
      longtable = T,
      escape = T,
      digits = 3,
      col.names = c("Name", 
                    "Label",
                    "ESS6 BEDUT",
                    "ESS7 BEDUT"),
      align = c(rep("l", 2), rep("r", 2)),
      caption = paste("\\label{tab:iccoverview} Intra-interviewer correlations for selected survey items")) %>%
  column_spec(c(1, 3:4), width = "1.5cm") %>%
  column_spec(2, width = "8cm") %>%
  add_header_above(c(" " = 2, "Intra-interviewer correlation" = 2)) %>%
  kable_styling(latex_options = c("HOLD_position", "repeat_header"))

```

```{r ICCs_descriptives, results = "asis"}

stargazer(ICCs_itembyround[c("ESS6_BEDUT", "ESS7_BEDUT")],
          header = F,
          median = T, iqr = T,
          title = "Descriptives of intra-interviewer correlations (all survey items)")
```

```{r ICCs_selected_descriptives, results = "asis"}

stargazer(ICCs_selected_itembyround[c("ESS6_BEDUT", "ESS7_BEDUT")],
          header = F,
          median = T, iqr = T,
          title = "Descriptives of intra-interviewer correlations (selected survey items)")
```

\FloatBarrier

## Interviewers' contributions to measurement error

```{r audio_alternative groupings, results = "asis"}
interviewers <- interviewers %>%
  mutate(AUDIO_READING = ((AUDIO_ALL == 2) +
           (AUDIO_ALLQSAME == 2) +
           (AUDIO_ALLREAD == 2) +
           (AUDIO_CLEAR == 2) +
           (AUDIO_COMPLETE == 2) + 
           (AUDIO_NOEXTRA == 2)),
         AUDIO_SUGGESTIVE = ((AUDIO_NOEXAMPLE == 2) +
                               (AUDIO_NOJUDGEMENT == 2) +
                               (AUDIO_NOOPINION == 2) +
                               (AUDIO_NOTINOPTIONS == 2) +
                               (AUDIO_QREPEAT == 2) +
                               (AUDIO_NOTSUGGESTIVE == 2) +
                               (AUDIO_RESPINTERPRET == 2)),
         AUDIO_RUSHING = ((AUDIO_TEMPO == 2) +
                            (AUDIO_ENOUGHTIME == 2)),
         AUDIO_MINOR = AUDIO_NDEV - AUDIO_READING - AUDIO_SUGGESTIVE - AUDIO_RUSHING,
         AUDIO_READINGCAT = ifelse(AUDIO_READING > 0, 1, 0),
         AUDIO_SUGGESTIVECAT = ifelse(AUDIO_SUGGESTIVE > 0, 1, 0),
         AUDIO_RUSHINGCAT = ifelse(AUDIO_RUSHING > 0, 1, 0),
         AUDIO_MINORCAT = ifelse(AUDIO_MINOR > 0, 1, 0))


```

```{r attach_ICCs}
# Note: RE already only contains the selected items

RE <- left_join(RE, interviewers)
RE <- left_join(RE, ICCs[c("essround", "cntry", "item", "icc_fit_controls")])

```

Deviations larger than .5 in absolute value:
`r with(RE, table(abs_condval > .5))`
`r with(RE, prop.table(table(abs_condval > .5)))`

```{r RE_plot, fig.cap = "\\label{fig:RE_plot} Interviewers' individual absolute bias estimates"}

p2 <- ggplot(RE %>%
               mutate(abs_condval_mean = ave(abs_condval, INTNUM_FW, FUN = mean)) %>%
               arrange(abs_condval_mean) %>%
               mutate(essround = as.factor(paste0("ESS", essround, "_BEDUT")),
                      INTNUM_FW = factor(as.factor(INTNUM_FW), levels = unique(INTNUM_FW)),
                      abs_condval = ifelse(abs_condval > .5, .5, abs_condval)), 
            aes(x = abs_condval, y = INTNUM_FW, col = essround, shape = essround)) +
  geom_point(alpha = .25, col = "black") +
  scale_x_continuous(name = "Estimated absolute difference between interviewer-specific intercept and
                     overall intercept", limits = c(0, .51), breaks = seq(0, .5, .05)) +
  scale_shape_manual(values = c("ESS6_BEDUT" = 2, "ESS7_BEDUT" = 3)) +
  #scale_shape_manual(values = c("ESS6_BEDUT" = 15, "ESS7_BEDUT" = 19)) +
  geom_point(inherit.aes = F, aes(x = abs_condval_mean, y = INTNUM_FW), shape = 8, size = 1, col = "black") +
  themeKUL +
  theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.line.x = element_line(),
        strip.text = element_blank(),
        legend.position = "bottom",
        legend.direction = "vertical") +
  guides(colour = guide_legend(override.aes = list(alpha = .5)),
         shape = guide_legend(override.aes = list(alpha = .5)))

p2

```

```{r checkREWLcorrelation}
RE_byinterviewer <- aggregate(abs_condval ~ INTNUM_FW + essround, FUN = mean,  data = RE)
RE_byinterviewer <- left_join(RE_byinterviewer, interviewers, by = c("INTNUM_FW", "essround"))

with(RE_byinterviewer, summary(abs_condval)); with(RE_byinterviewer, quantile(abs_condval, seq(0, 1, .05)))
with(RE_byinterviewer, cor.test(abs_condval, iWL))
```

\FloatBarrier

```{r structure_data_interviewers}

# Spread to byround
audioscore_byround <- interviewers %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, INTNUM_FW, AUDIO_NDEV) %>%
  spread(key = essround, value = AUDIO_NDEV)

speed_byround <- interviewers %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, INTNUM_FW, speed_mainexclF.mean) %>%
  spread(key = essround, value = speed_mainexclF.mean)

speedtrend_byround <- interviewers %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, INTNUM_FW, speedTrend.mean) %>%
  spread(key = essround, value = speedTrend.mean)


# Audio checks frequencies
audiochecks_freq <- read.csv("20200108_audiochecks_freq.csv", dec = ".", 
                 sep = ";", stringsAsFactors = F)

audiovarlabels <- read.csv("var_audio labels.csv", dec = ".", 
                 sep = ";", stringsAsFactors = F)

audiovarlabels <- audiovarlabels%>%
  mutate(checkorder = 1:nrow(audiovarlabels))

audiochecks_freq_checksbyround <- audiochecks_freq %>%
  mutate(essround = paste0("ESS", essround, "_BEDUT")) %>%
  dplyr::select(essround, variable, label, prop) %>%
  spread(key = essround, value = prop)

audiochecks_freq_checksbyround <- left_join(audiochecks_freq_checksbyround, audiovarlabels) %>%
  arrange(checkorder)

```

```{r compute_summary_interviewers}

audio_cor <- with(audioscore_byround, cor.test(ESS6_BEDUT, ESS7_BEDUT, use = "complete.cases"))
speed_cor <- with(speed_byround, cor.test(ESS6_BEDUT, ESS7_BEDUT, use = "complete.cases"))
speedtrend_cor <- with(speedtrend_byround, cor.test(ESS6_BEDUT, ESS7_BEDUT, use = "complete.cases"))

```

## Interviewers' deviation from the standardized interviewing protocol

```{r audio_freq, results = "asis"}

audiotable <- audiochecks_freq_checksbyround %>%
        dplyr::select(label, ESS6_BEDUT, ESS7_BEDUT) 


kable(audiotable %>%
        mutate_at(c("ESS6_BEDUT", "ESS7_BEDUT"), function(x) formattable::percent(x, digits = 1)),
      row.names = F,
      booktabs = T,
      digits = 1,
      col.names = c("Check",
                    "ESS6 BEDUT",
                    "ESS7 BEDUT"),
      align = c("l", rep("r", 2)),
      caption = paste("\\label{tab:audio_freq} Checklist of standardized interviewing protocol")) %>%
  column_spec(1, width = "10cm") %>%
  column_spec(2:3, width = "2.5cm") %>%
  kable_styling(latex_options = c("hold_position")) %>%
  add_header_above(c(" " = 1, "Deviation from check" = 2), escape = F)

```


```{r audio_histogram, fig.cap = "\\label{fig:audio_histogram} Distribution of the interviewers' standardized interviewing deviation score", fig.height = 3}

p3 <- ggplot(data = interviewers %>%
              mutate(essround = as.factor(paste0("ESS", essround, "_BEDUT"))),
            aes(x = AUDIO_NDEV)) +
  geom_bar(alpha = .5, col = NA, fill = "grey", width = 0.75) +
  geom_hline(yintercept = c(5, 10, 15), colour = 'white', width = 0.25) +
  scale_x_continuous(name = "Standardized interviewing deviation score", 
                     breaks = seq(0, 30, 1)) +
  facet_wrap(facets = "essround") +
  themeKUL +
  theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.line.x = element_line(),
        legend.position = "none")
p3

```

```{r audio_scatterplot, fig.cap = "\\label{fig:audio_scatterplot} Joint distribution of the two survey rounds' standardized interviewing deviation score", fig.height = 4, fig.width = 4}

p4 <- ggplot(data = subset(audioscore_byround, complete.cases(audioscore_byround)),
              aes(x = ESS6_BEDUT, y = ESS7_BEDUT)) +
  geom_point(size = 2, shape = 21, alpha = .2, fill = "black", col = "white") + 
  scale_x_continuous(name = "Standardized interviewing deviation score, ESS6_BEDUT", 
                     limits = c(-1, 15), breaks = seq(0, 15, 1)) +
  scale_y_continuous(name = "Standardized interviewing deviation score, ESS7_BEDUT", 
                     limits = c(-1, 15), breaks = seq(0, 15, 1)) +
  themeKUL +
  theme(axis.line.x = element_line(),
        axis.line.y = element_line()) 

xplot <- ggplot(subset(audioscore_byround, complete.cases(audioscore_byround)),
                aes(x = ESS6_BEDUT)) + 
  geom_histogram(fill = ESSpink, col = "white", alpha = .5, binwidth = 1) + 
  scale_x_continuous(limits = c(-1, 15)) + 
  themeKUL + 
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank())

yplot <- ggplot(subset(audioscore_byround, complete.cases(audioscore_byround)),
                aes(x = ESS7_BEDUT)) + 
  geom_histogram(fill = KULgold, col = "white", alpha = .5, binwidth = 1) +
  scale_x_continuous(limits = c(-1, 15)) + 
  themeKUL + 
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank()) + 
  coord_flip()

p4 <- plot_grid(xplot, NULL, p4, yplot, ncol = 2, align = "hv", 
      rel_widths = c(3, 1), rel_heights = c(1, 3))

p4

```

```{r audio_descriptives, results = "asis"}

stargazer(audioscore_byround[c("ESS6_BEDUT", "ESS7_BEDUT")],
          header = F,
          title = "Descriptives of standardized interviewing deviation score")
```



Deviation counts:

`r with(interviewers, table(AUDIO_NDEV, essround))`

`r with(interviewers, prop.table(table(AUDIO_NDEV, essround), 2))`

`r with(interviewers, table(AUDIO_READING, essround))`

`r with(interviewers, table(AUDIO_SUGGESTIVE, essround))`

`r with(interviewers, table(AUDIO_RUSHING, essround))`

`r with(interviewers, prop.table(table(AUDIO_READING > 0, essround), 2))`

`r with(interviewers, prop.table(table(AUDIO_SUGGESTIVE > 0, essround), 2))`

`r with(interviewers, prop.table(table(AUDIO_RUSHING > 0, essround), 2))`

Among the `r analyticsample$nintoverlap[1]` interviewers who worked in both survey rounds, the deviation counts observed in the two survey rounds are unrelated (*r* = `r with(audio_cor, digits(estimate, 2))`, 95% CI [`r with(audio_cor, paste(digits(conf.int, 2), collapse = "-"))`], \autoref{fig:audio_scatterplot}). Although it is possible for interviewers' interviewing practice to improve or deteriorate over time, we would expect at least a moderately positive correlation between the two survey rounds' scores. The lack of any association challenges the reliability of the deviation count as a measure of any stable component of interviewers' interviewing performance. 


\FloatBarrier

## Interviewers' interview speed

```{r speed_scatterplot, fig.cap = "\\label{fig:speed_scatterplot} Joint distribution of the two survey rounds' average interview speed", fig.height = 4, fig.width = 4}

p4 <- ggplot(data = subset(speed_byround, complete.cases(speed_byround)),
              aes(x = ESS6_BEDUT, y = ESS7_BEDUT)) +
  geom_point(size = 2, shape = 21, alpha = .2, fill = "black", col = "white") + 
  scale_x_continuous(name = "Average interview speed, ESS6_BEDUT", 
                     limits = c(0, 6), breaks = seq(0, 6, 1)) +
  scale_y_continuous(name = "Average interview speed, ESS7_BEDUT", 
                     limits = c(0, 6), breaks = seq(0, 6, 1)) +
  themeKUL +
  theme(axis.line.x = element_line(),
        axis.line.y = element_line()) 

xplot <- ggplot(subset(speed_byround, complete.cases(speed_byround)),
                aes(x = ESS6_BEDUT)) + 
  geom_histogram(fill = ESSpink, col = "white", alpha = .5, binwidth = .5) + 
  scale_x_continuous(limits = c(0, 6)) + 
  themeKUL + 
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank())

yplot <- ggplot(subset(speed_byround, complete.cases(speed_byround)),
                aes(x = ESS7_BEDUT)) + 
  geom_histogram(fill = KULgold, col = "white", alpha = .5, binwidth = .5) +
  scale_x_continuous(limits = c(0, 6)) + 
  themeKUL + 
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank()) + 
  coord_flip()

p4 <- plot_grid(xplot, NULL, p4, yplot, ncol = 2, align = "hv", 
      rel_widths = c(3, 1), rel_heights = c(1, 3))

p4

```


```{r speed_descriptives, results = "asis"}

stargazer(speed_byround[c("ESS6_BEDUT", "ESS7_BEDUT")],
          header = F,
          title = "Descriptives of average interview speed")
```

```{r speedtrend_descriptives, results = "asis"}

stargazer(speedtrend_byround[c("ESS6_BEDUT", "ESS7_BEDUT")],
          header = F,
          title = "Descriptives of average interview acceleration over the questionnaire")
```

```{r speedstabilization}
subset(interviewers, intnum %in% c(111166, 111247, 112734))

p5 <- ggplot(data = subset(speed, intnum %in% c(111166, 111247, 112734)) %>%
               mutate(intnum = as.factor(intnum)),
             aes(x = intervieworder, y = speed_mainexclF.mean, linetype = intnum)) +
  geom_line() +
  scale_x_continuous(name = "Number of completed interviews", 
                     limits = c(0, 20), 
                     breaks = 0:20,
                     labels = c(0:10, rep("", 4), 15, rep("", 4), 20)) +
  scale_y_continuous(name = "Average interview speed", limits = c(2, NA), breaks = seq(0, 5, .5)) +
  scale_linetype_manual(values = c("111166" = "dashed",
                                   "111247" = "solid",
                                   "112734" = "dotted"),
                     labels = c("111166" = "Interviewer 1",
                                "111247" = "Interviewer 2",
                                "112734" = "Interviewer 3")) +
  themeKUL +
  theme(axis.line = element_line(),
        legend.direction = "vertical")

p6 <- ggplot(data = subset(speed, intnum %in% c(111166, 111247, 112734)) %>%
               mutate(intnum = as.factor(intnum)),
             aes(x = intervieworder, y = speedTrend.mean, linetype = intnum)) +
  geom_line() +
  scale_x_continuous(name = "Number of completed interviews", 
                     limits = c(0, 20), 
                     breaks = 0:20,
                     labels = c(0:10, rep("", 4), 15, rep("", 4), 20)) +
  scale_y_continuous(name = "Average acceleration over questionnaire", breaks = seq(-.5, .5, .25)) +
  scale_linetype_manual(values = c("111166" = "dashed",
                                   "111247" = "solid",
                                   "112734" = "dotted"),
                     labels = c("111166" = "Interviewer 1",
                                "111247" = "Interviewer 2",
                                "112734" = "Interviewer 3")) +
  themeKUL +
  theme(axis.line = element_line(),
        legend.direction = "vertical")


```

Among the `r analyticsample$nintoverlap[1]` interviewers who worked in both survey rounds, the average interview speed observed in the two survey rounds are strongly related (*r* = `r with(speed_cor, digits(estimate, 2))`, 95% CI [`r with(speed_cor, paste(digits(conf.int, 2), collapse = "-"))`], \autoref{fig:speed_scatterplot}). The average trend in interview speed over the questionnaire observed in the two survey rounds for these interviewers, on the other hand, is unrelated (*r* = `r with(speedtrend_cor, digits(estimate, 2))`, 95% CI [`r with(speedtrend_cor, paste(digits(conf.int, 2), collapse = "-"))`]). An interviewer's average interview speed thus appears to be a fairly stable characteristic of his or her interviewing performance, while trends in interview speed over the questionnaire may be determined by, and/or adapted to, the respondents' role performance.

\FloatBarrier


# Results

```{r models}

# Model 0
spec_0 <- "abs_condval ~ essround + iWL + (1|item) + (1|INTNUM_FW)"

fit_0 <- lmer(spec_0, data = RE, na.action = na.exclude)
fit_00 <- update(fit_0, . ~ . - iWL)

# Model A and A'
fit_audio <- update(fit_0, . ~ . + AUDIO_NDEV)
fit_audioalt <- update(fit_0, . ~ . + AUDIO_READINGCAT + AUDIO_SUGGESTIVECAT + AUDIO_RUSHINGCAT)

# Model B and B'
fit_speed <- update(fit_0, . ~ . + speed_mainexclF.mean + speedTrend.mean)
fit_speedtrend <- update(fit_0, . ~ . + speedTrend.mean)

# Model C and C'
fit_final <- update(fit_0, . ~ . + AUDIO_NDEV + speed_mainexclF.mean + speedTrend.mean)
fit_finalalt <- update(fit_0, . ~ . + AUDIO_READINGCAT + AUDIO_SUGGESTIVECAT + AUDIO_RUSHINGCAT + speed_mainexclF.mean + speedTrend.mean)

# # Additional interaction term tests
# fit_speed_quadratic <- update(fit_audio, . ~ . + I(speed_mainexclF.mean^2) + I(speedTrend.mean^2))
# fit_audio_interact <- update(fit_0, . ~ . + AUDIO_NDEV*iWL + AUDIO_NDEV*icc_fit_controls)
# fit_speed_interact <- update(fit_0, . ~ . + speed_mainexclF.mean*iWL + speedTrend.mean*iWL +
#                       speed_mainexclF.mean*icc_fit_controls + speedTrend.mean*icc_fit_controls)

# Excluding workload
fit_audio2 <- update(fit_00, . ~ . + AUDIO_NDEV)
fit_audioalt2 <- update(fit_00, . ~ . + AUDIO_READINGCAT + AUDIO_SUGGESTIVECAT + AUDIO_RUSHINGCAT)
fit_speed2 <- update(fit_00, . ~ . + speed_mainexclF.mean + speedTrend.mean)
fit_speedtrend2 <- update(fit_00, . ~ . + speedTrend.mean)
fit_final2 <- update(fit_00, . ~ . + AUDIO_NDEV + speed_mainexclF.mean + speedTrend.mean)
fit_finalalt2 <- update(fit_00, . ~ . + AUDIO_READINGCAT + AUDIO_SUGGESTIVECAT + AUDIO_RUSHINGCAT + speed_mainexclF.mean + speedTrend.mean)

# Add workload interaction
fit_audio3 <- update(fit_0, . ~ . + iWL*AUDIO_NDEV)
fit_speed3 <- update(fit_0, . ~ . + iWL*speed_mainexclF.mean + iWL*speedTrend.mean)
fit_speedtrend3 <- update(fit_0, . ~ . + iWL*speedTrend.mean)
fit_final3 <- update(fit_0, . ~ . + iWL*AUDIO_NDEV + iWL*speed_mainexclF.mean + iWL*speedTrend.mean)
fit_finalalt3 <- update(fit_0, . ~ . + iWL*AUDIO_READINGCAT + iWL*AUDIO_SUGGESTIVECAT + iWL*AUDIO_RUSHINGCAT + iWL*speed_mainexclF.mean + iWL*speedTrend.mean)



# List of model fits
fits <- list("fit_00" = fit_00, "fit_0" = fit_0, 
             "fit_audio" = fit_audio, "fit_audioalt" = fit_audioalt, 
             "fit_speed" = fit_speed, 
             "fit_final" = fit_final, "fit_finalalt" = fit_finalalt)

fits_std <- lapply(fits, beta)

fits_estimates <- purrr::map_df(fits, tidy, .id = "model")
fits_stdestimates <- purrr::map_df(fits_std, function(fit) tidy(coef(fit)), .id = "model")
fits_modelfit <- purrr::map_df(fits, function(fit) broom::glance(refitML(fit)), .id = "model")
fits_nobs <- purrr::map_df(fits, function(fit){
  data.frame(n = nobs(fit), J = ngrps(fit)[1], K = ngrps(fit)[2])}, .id = "model")

# Extract variance components
fits_varcomp <- lapply(fits, function(fit) as.data.frame(VarCorr(fit)))
fits_var_interviewers <- as.character(digits(lapply(fits_varcomp, function(x) x$vcov[x$grp == "INTNUM_FW"]), 6))
fits_var_items <- as.character(digits(lapply(fits_varcomp, function(x) x$vcov[x$grp == "item"]), 6))
fits_var_residual <- as.character(digits(lapply(fits_varcomp, function(x) x$vcov[x$grp == "Residual"]), 6))
fits_icc_interviewers <- as.character(digits(lapply(fits_varcomp, function(x) x$vcov[x$grp == "INTNUM_FW"]/sum(x$vcov)), 6))
fits_icc_items <- as.character(digits(lapply(fits_varcomp, function(x) x$vcov[x$grp == "item"]/sum(x$vcov)), 6))

intvar_0 <- fits_varcomp[["fit_0"]]$vcov[fits_varcomp[["fit_0"]]$grp == "INTNUM_FW"]
fits_intvar_explained <- lapply(fits_varcomp, function(x) (intvar_0 - x$vcov[x$grp == "INTNUM_FW"])/intvar_0)


# Model fits
fits_AIC <- as.character(comma(lapply(fits, function(fit) AIC(refitML(fit))), digits = 0))

```


Model A versus model 0
`r anova(fit_audio, fit_0)`

Model A' versus model 0
`r anova(fit_audioalt, fit_0)`

Model B vesus model 0
`r anova(fit_speed, fit_0)`

Model B with quadratic terms verus Model B
`r anova(fit_speed_quadratic, fit_speed)`

Model B vesus model A
`r anova(fit_speed, fit_audio)`

Model C vesus model A
`r anova(fit_final, fit_audio)`

Model C vesus model B
`r anova(fit_final, fit_speed)`


Three standard deviations x std. coef x standard deviation of variance contribution 
`r 10/with(interviewers, sd(AUDIO_NDEV))*0.0547166915550853*with(RE, sd(abs_condval))`

Number of deiating behaiours x coef:
`r 10*0.001440772`

```{r predictionplots}
# Prediction data
newdata_audio = expand.grid(AUDIO_NDEV = with(interviewers, seq(min(AUDIO_NDEV), max(AUDIO_NDEV), 1)),
                               iWL = with(interviewers, mean(iWL)), essround = c(6, 7), item ="a", INTNUM_FW = 1)
newdata_audioalt = expand.grid(AUDIO_READINGCAT = c(0, 1), AUDIO_SUGGESTIVECAT = c(0, 1), AUDIO_RUSHINGCAT = c(0, 1),
                               iWL = with(interviewers, mean(iWL)), essround = c(6, 7), item ="a", INTNUM_FW = 1)
newdata_speed = expand.grid(speedTrend.mean = with(interviewers, seq(min(speedTrend.mean), max(speedTrend.mean), .1)),
                            speed_mainexclF.mean = with(interviewers, mean(speed_mainexclF.mean)),
                               iWL = with(interviewers, mean(iWL)), essround = c(6, 7), item ="a", INTNUM_FW = 1)

newdata_audio$abs_condvar_pred <- predict(fit_audio, newdata_audio, re.form = NA)
newdata_audioalt$abs_condvar_pred <- predict(fit_audioalt, newdata_audioalt, re.form = NA)
newdata_speed$abs_condvar_pred <- predict(fit_speed, newdata_speed, re.form = NA)
# 
# 
# p4 <- ggplot(newdata_audio %>%
#                mutate(essround = paste0("ESS", essround, "_BEDUT")), 
#              aes(x = AUDIO_NDEV, y = abs_condvar_pred, linetype = essround)) +
#   geom_line() +
#   geom_point() +
#   scale_y_continuous(name = "Predicted absolute contribution to interviewer variance", limits = c(0, .14)) +
#   scale_x_continuous(name = "Standardized interviewing deviation count") +
#   themeKUL +
#   theme(axis.line = element_line())
# 
# p5 <- ggplot(newdata_audioalt %>%
#                mutate(essround = paste0("ESS", essround, "_BEDUT"),
#                       AUDIO_CAT = rep(c("None", "(1)", "(2)", "(1) and (2)", "(3)", "(1) and (3)", "(2) and (3)", "(1), (2) and (3)"), 2)), 
#              aes(x = AUDIO_CAT, y = abs_condvar_pred, linetype = essround)) +
#   geom_point() +
#   scale_y_continuous(name = "Predicted absolute contribution to interviewer variance", limits = c(0, .14)) +
#   scale_x_discrete(name = "Standardized interviewing category") +
#   themeKUL +
#   theme(axis.line = element_line())
# 
# p6 <- ggplot(newdata_speed %>%
#                mutate(essround = paste0("ESS", essround, "_BEDUT")), 
#              aes(x = speedTrend.mean, y = abs_condvar_pred, linetype = essround)) +
#   geom_line() +
#   geom_point() +
#   scale_y_continuous(name = "Predicted absolute contribution to interviewer variance", limits = c(0, .14)) +
#   scale_x_continuous(name = "Acceleration over the questionnaire") +
#   themeKUL +
#   theme(axis.line = element_line())
```



\newpage
\blandscape

```{r models_table, results = "asis"}

stargazer(fit_0, fit_audio, fit_audioalt, fit_speed, fit_final, fit_finalalt,
          header = FALSE,
          digits = 4,
          type = "latex",
          title = "\\label{tab:models} Fixed parameter estimates for models explaining interviewers' contributions to interviewer variance",
          model.numbers = FALSE,
          column.labels = c("Model 0", "Model A", "Model A'", "Model B", "Model C", "Model C'"),
          dep.var.caption = "",
          dep.var.labels.include = FALSE,
          order = c("AUDIO_NDEV",
                    "AUDIO_READINGCAT", "AUDIO_SUGGESTIVECAT", "AUDIO_RUSHINGCAT",
                    "speed_mainexclF.mean", "speedTrend.mean",
                    "iWL", "essround", "(Intercept)"),
          covariate.labels = c("AUDIO_NDEV" = "Standardized interviewing deviation count",
                               "AUDIO_READINGCAT" = "At least one flag for question reading",
                               "AUDIO_SUGGESTIVECAT" = "At least one flag for deviation from neutrality",
                               "AUDIO_RUSHINGCAT" = "At least one flag for inadequate interview pace",
                               "speed_mainexclF.mean" = "Average interview speed",
                               "speedTrend.mean" = "Average acceleration over questionnaire",
                               "iWL" = "Number of interviews completed by interviewer",
                               "essround7" = "Round 7"),
          keep.stat = "n",
          add.lines = list(c("Interviewer variance", tail(unlist(fits_var_interviewers), -1)),
                           c("Item variance", tail(unlist(fits_var_items), -1)),
                           c("Residual variance", tail(unlist(fits_var_residual), -1)),
                           c("AIC", tail(unlist(fits_AIC), -1), 0)))
```



\elandscape
\newpage 

```{r cumulativemodels}
AICs_speedmodels <- data.frame()

for(i in 1:max(speed$intervieworder)){
  fit <- lme4::lmer(abs_condval ~ essround + pmin(i, iWL) + (1|item) + (1|INTNUM_FW) + 
                      speed_mainexclF.mean + speedTrend.mean,
              data = subset(RE_cum, intervieworder == i & iWL >= 8),
              na.action = na.exclude)
    
  assign(paste0("fit_speed_", i), fit)
  AICs_speedmodels <- rbind(AICs_speedmodels, data.frame(intervieworder = i, 
                                                         AIC = AIC(refitML(fit))))
}

fit_0_8 <- lmer(abs_condval ~ essround + iWL + (1|item) + (1|INTNUM_FW),
              data = subset(RE, iWL >= 8),
              na.action = na.exclude)

AICs_speedmodels_diff <- AICs_speedmodels %>%
  mutate(AICdiff0 = AIC - AIC(refitML(fit_0_8)),
         AICdiffend = AIC - with(subset(AICs_speedmodels, intervieworder == 47), AIC))

```

Number of interviewers:
`r with(interviewers, table(iWL, essround))`
`r with(interviewers, table(iWL >= 8, essround))`
`r with(interviewers, prop.table(table(iWL >= 8, essround), 2))`

Model B intermediate
`r anova(fit_speed_4, fit_0_8)`

`r anova(fit_speed_7, fit_speed_6)`

```{r cumulativemodels_plot, fig.width = 8, fig.height = 6}
p6 <- ggplot(data = subset(AICs_speedmodels_diff, intervieworder <= 8), 
            aes(y = AICdiff0, x = intervieworder)) +
  geom_line(size = 1, color = "black") +
  geom_point(size = 2, color = "black") +
  scale_y_continuous(name = "AIC difference with Model 0") +
  scale_x_continuous(name = "Number of completed interviews", breaks = 1:20) +
  # scale_color_manual(values = c("AICdiff" = "black",
  #                               "AIC_speed_end" = "grey"),
  #                    labels = c("AICdiff" = "Interview speed measures computed over x first completed interviews",
  #                               "AIC_speed_end" = "Interview speed measures computed at end of data collection")) +
  # scale_linetype_manual(values = c("AICdiff" = "dashed",
  #                               "AIC_speed_end" = "solid"),
  #                    labels = c("AICdiff" = "Interview speed measures computed over x first completed interviews",
  #                               "AIC_speed_end" = "Interview speed measures computed at end of data collection")) +
  # scale_size_manual(values = c("AICdiff" = 2,
  #                               "AIC_speed_end" = 0),
  #                    labels = c("AICdiff" = "Interview speed measures computed over x first completed interviews",
  #                               "AIC_speed_end" = "Interview speed measures computed at end of data collection")) +
  themeKUL + 
  theme(legend.position = "none",
        axis.line = element_line())

p6

```




\newpage

# Appendix {-}



